rm(list=ls())

# load packages
library(tidyverse)
library(commarobust)
library(stargazer)
library(RColorBrewer)
library(sandwich)
library(lmtest)
library(commarobust)
library(data.table)
library(Zelig)

# read in data
df <- fread('merged_gallup.csv')

# fix some vars
df$party <- factor(df$party, levels=c("Republican",'Independent','Democrat'))
df$general_health_poor <- (as.numeric(df$general_health_poor)-1)/4
df$age <- (df$age)/99
df$year_after_2014 <- ifelse(df$year >= 2014,1,0)

# partisanship
df$strong_rep <- ifelse(df$pid5_rep == 1,1,0)
df$weak_rep <- ifelse(df$pid5_rep == 0.75,1,0)
df$weak_dem <- ifelse(df$pid5_rep == 0.25,1,0)
df$strong_dem <- ifelse(df$pid5_rep == 0,1,0)

#health
df$excellent_health <- ifelse(df$general_health_poor == 1,1,0)
df$verygood_health <- ifelse(df$general_health_poor == 0.75,1,0)
df$good_health <- ifelse(df$general_health_poor == 0.5,1,0)
df$fair_health <- ifelse(df$general_health_poor == 0.25,1,0)

df$health_scale <- (df$high_blood_pressure + df$high_cholesterol + df$diabetes + df$depression + df$had_heart_attack + df$cancer)/6
weighted.mean(df$health_scale, na.rm=T, wt=df$wt)
sd(df$health_scale, na.rm=T)

# Correlations

cor(df$general_health_poor, df$health_scale, use='complete.obs')
psych::alpha(cbind(df$high_blood_pressure , df$high_cholesterol , df$diabetes , df$depression , df$had_heart_attack , df$cancer))

# plot
# colors <- brewer.pal(3,"Set1")
# df$monthyear <- paste0(month(df$date), year(df$date))
# subdat %>%
#   filter(age < 65) %>%
#   group_by(monthyear, pid5_rep) %>%
#   summarise(insured = mean(have_health_insurance,na.rm=T)) %>%
#   na.omit() %>%
#   ggplot(aes(monthyear, y=insured, group=pid5_rep, color=pid5_rep)) +
#   geom_point() +
#   stat_smooth() +
#   scale_color_manual(values=c(colors[2],colors[3],colors[1]),name='Party') +
#   theme_bw() +
#   labs(y="Insured", x='')

weighted.mean(df$health_scale, na.rm=T, wt=df$wt)

# modeling
m1 <- lm(formula = have_health_insurance ~ pid5_rep + #main DID estimator
             general_health_poor + #health 
             age + married + college + female + white + latino + black + asian + income_24_60 + income_60_120 + income_over_120 + income_missing + #demos
             as.factor(state), #stateFE
           data=subset(df, age < 65 & year < 2014))
m2 <- lm(have_health_insurance ~ pid5_rep + #main DID estimator
           general_health_poor + #health 
           age + married + college + female + white + latino + black + asian + income_24_60 + income_60_120 + income_over_120 + income_missing + #demos
           as.factor(state), #stateFE
         data=subset(df, age < 65 & year >= 2014))
m3 <- lm(have_health_insurance ~ year_after_2014*(pid5_rep + #main DID estimator
           general_health_poor + #health 
           age + married + college + female + white + latino + black + asian + income_24_60 + income_60_120 + income_over_120 + income_missing + #demos
           as.factor(state)) + as.factor(year), #stateFE
         data=subset(df, age < 65))

form <- have_health_insurance ~ 
    year_after_2014*(strong_rep + weak_rep + weak_dem + strong_dem +
                     excellent_health + verygood_health + good_health + fair_health +
                    age + married + college +
                    female + white + latino + 
                    black + asian + income_24_60 + 
                    income_60_120 + income_over_120 + 
                    income_missing)  + as.factor(state)
subdat <- as.data.frame(subset(df, age < 65))
m1 <- glm(form, subdat, family=binomial(link='logit'))
stargazer(m1, omit=c('state'),
          type='html', style='APSR',
          dep.var.labels=c('Have Health Insurance'),
          add.lines = list(c('State FE', 'Y')),
          star.cutoffs = c(0.05, 0.01, 0.001),
          covariate.labels = c('Post-2014','Strong R (ref=Ind)','Weak R','Weak Dem','Strong Dem',
                               'Excellent Health (ref=poor)','Very Good Health','Good Health','Fair Health',
                               'Age', 'Married', 'College','Female','White','Latino','Black','Asian',
                               'Income $24k-$60k (ref < 24)','Income $60k-$120k','Income Over $120k','Income Missing',
                               'Post-2014 * Strong R (ref=Ind)','Post-2014 * Weak R','Post-2014 * Weak Dem','Post-2014 * Strong Dem',
                               'Post-2014 * Excellent Health (ref=poor)','Post-2014 * Very Good Health','Post-2014 * Good Health','Post-2014 * Fair Health',
                               'Post-2014 * Age', 'Post-2014 * Married', 'Post-2014 * College','Post-2014 * Female','Post-2014 * White','Post-2014 * Latino','Post-2014 * Black','Post-2014 * Asian',
                               'Post-2014 * Income $24k-$60k (ref < 24)','Post-2014 * Income $60k-$120k','Post-2014 * Income Over $120k','Post-2014 * Income Missing'),
          out='gallupout.html')

Model <- zelig(form, data = subdat, model = 'logit')

out1 <- Model %>%
  setx(strong_rep = 1, weak_rep=0, weak_dem=0, strong_dem=0, year_after_2014=0) %>%
  setx1(strong_rep = 1, weak_rep=0, weak_dem=0, strong_dem=0, year_after_2014=1) %>%
  sim()
out2 <- Model %>%
  setx(strong_rep = 0, weak_rep=1, weak_dem=0, strong_dem=0, year_after_2014=0) %>%
  setx1(strong_rep = 0, weak_rep=1, weak_dem=0, strong_dem=0, year_after_2014=1) %>%
  sim()
out3 <- Model %>%
  setx(strong_rep = 0, weak_rep=0, weak_dem=0, strong_dem=0, year_after_2014=0) %>%
  setx1(strong_rep = 0, weak_rep=0, weak_dem=0, strong_dem=0, year_after_2014=1) %>%
  sim()
out4 <- Model %>%
  setx(strong_rep = 0, weak_rep=0, weak_dem=1, strong_dem=0, year_after_2014=0) %>%
  setx1(strong_rep = 0, weak_rep=0, weak_dem=1, strong_dem=0, year_after_2014=1) %>%
  sim()
out5 <- Model %>%
  setx(strong_rep = 0, weak_rep=0, weak_dem=0, strong_dem=1, year_after_2014=0) %>%
  setx1(strong_rep = 0, weak_rep=0, weak_dem=0, strong_dem=1, year_after_2014=1) %>%
  sim()

out6 <- Model %>%
  setx(excellent_health = 1, verygood_health=0, good_health=0, fair_health=0, year_after_2014=0) %>%
  setx1(excellent_health = 1, verygood_health=0, good_health=0, fair_health=0, year_after_2014=1) %>%
  sim()
out7 <- Model %>%
  setx(excellent_health = 0, verygood_health=1, good_health=0, fair_health=0, year_after_2014=0) %>%
  setx1(excellent_health = 0, verygood_health=1, good_health=0, fair_health=0, year_after_2014=1) %>%
  sim()
out8 <- Model %>%
  setx(excellent_health = 0, verygood_health=0, good_health=1, fair_health=0, year_after_2014=0) %>%
  setx1(excellent_health = 0, verygood_health=0, good_health=1, fair_health=0, year_after_2014=1) %>%
  sim()
out9 <- Model %>%
  setx(excellent_health = 0, verygood_health=0, good_health=0, fair_health=1, year_after_2014=0) %>%
  setx1(excellent_health = 0, verygood_health=0, good_health=0, fair_health=1, year_after_2014=1) %>%
  sim()
out10 <- Model %>%
  setx(excellent_health = 0, verygood_health=0, good_health=0, fair_health=0, year_after_2014=0) %>%
  setx1(excellent_health = 0, verygood_health=0, good_health=0, fair_health=0, year_after_2014=1) %>%
  sim()


gallup_all <- data.frame(x=c('Strong Republican', 'Lean Republican', "Independent", 'Lean Democrat','Strong Democrat',
               'Poor Health','Fair Health','Good Health','Very Good Health','Excellent Health'),
           y=c(mean(as.data.frame(out1$sim.out$x1$fd)[,1]),
               mean(as.data.frame(out2$sim.out$x1$fd)[,1]),
               mean(as.data.frame(out3$sim.out$x1$fd)[,1]),
               mean(as.data.frame(out4$sim.out$x1$fd)[,1]),
               mean(as.data.frame(out5$sim.out$x1$fd)[,1]),
               mean(as.data.frame(out6$sim.out$x1$fd)[,1]),
               mean(as.data.frame(out7$sim.out$x1$fd)[,1]),
               mean(as.data.frame(out8$sim.out$x1$fd)[,1]),
               mean(as.data.frame(out9$sim.out$x1$fd)[,1]),
               mean(as.data.frame(out10$sim.out$x1$fd)[,1])),
           lower=c(quantile(as.data.frame(out1$sim.out$x1$fd)[,1],0.025),
                   quantile(as.data.frame(out2$sim.out$x1$fd)[,1],0.025),
                   quantile(as.data.frame(out3$sim.out$x1$fd)[,1],0.025),
                   quantile(as.data.frame(out4$sim.out$x1$fd)[,1],0.025),
                   quantile(as.data.frame(out5$sim.out$x1$fd)[,1],0.025),
                   quantile(as.data.frame(out6$sim.out$x1$fd)[,1],0.025),
                   quantile(as.data.frame(out7$sim.out$x1$fd)[,1],0.025),
                   quantile(as.data.frame(out8$sim.out$x1$fd)[,1],0.025),
                   quantile(as.data.frame(out9$sim.out$x1$fd)[,1],0.025),
                   quantile(as.data.frame(out10$sim.out$x1$fd)[,1],0.025)),
           upper=c(quantile(as.data.frame(out1$sim.out$x1$fd)[,1],0.975),
                   quantile(as.data.frame(out2$sim.out$x1$fd)[,1],0.975),
                   quantile(as.data.frame(out3$sim.out$x1$fd)[,1],0.975),
                   quantile(as.data.frame(out4$sim.out$x1$fd)[,1],0.975),
                   quantile(as.data.frame(out5$sim.out$x1$fd)[,1],0.975),
                   quantile(as.data.frame(out6$sim.out$x1$fd)[,1],0.975),
                   quantile(as.data.frame(out7$sim.out$x1$fd)[,1],0.975),
                   quantile(as.data.frame(out8$sim.out$x1$fd)[,1],0.975),
                   quantile(as.data.frame(out9$sim.out$x1$fd)[,1],0.975),
                   quantile(as.data.frame(out10$sim.out$x1$fd)[,1],0.975)))

gallup_all %>%
  mutate(x=factor(x,levels=c('Strong Republican', 'Lean Republican',
                             "Independent", 'Lean Democrat','Strong Democrat',
               'Excellent Health','Very Good Health','Good Health', 
               'Fair Health','Poor Health')),
         facet=c(rep("Symbolic",5),rep('Self-Interest',5))) %>%
  ggplot(aes(x, y)) + 
  #geom_errorbar(aes(x, ymin=lower, ymax=upper), width=0.05) +
  #geom_point() +
  #geom_hline(yintercept=0, linetype=2, color='red') +
  theme_bw() + 
  labs(y='Change Pr(Insured) Pre-Post 2014', x='') +
  coord_flip() +
  facet_grid(facet~.,switch = 'y',scales = 'free_y',space='free_y') +
  ggsave(width=6,height=6,file='gallup1.png')

save <- gallup_all %>%
  mutate(x=factor(x,levels=c('Strong Republican', 'Lean Republican', "Independent", 'Lean Democrat','Strong Democrat',
                             'Excellent Health','Very Good Health','Good Health', 'Fair Health','Poor Health')),
         facet=c(rep("Symbolic",5),rep('Self-Interest',5)))
save[1:5,2:4] <- NA  
save %>% ggplot(aes(x, y)) + 
  geom_errorbar(aes(x, ymin=lower, ymax=upper), width=0.05) +
  geom_point(size=2.5,fill='white',shape=21) +
  geom_hline(yintercept=0, linetype=2, color='red') +
  theme_bw() + 
  labs(y='Change Pr(Insured) Pre-Post 2014', x='') +
  coord_flip() +
  facet_grid(facet~.,switch = 'y',scales = 'free_y',space='free_y') +
  ggsave(width=6,height=6,file='gallup2.png')

gallup_all %>%
  mutate(x=factor(x,levels=c('Strong Republican', 'Lean Republican', "Independent", 'Lean Democrat','Strong Democrat',
                             'Excellent Health','Very Good Health','Good Health', 'Fair Health','Poor Health')),
         facet=c(rep("Symbolic",5),rep('Self-Interest',5))) %>%
  ggplot(aes(x, y)) + 
  geom_errorbar(aes(x, ymin=lower, ymax=upper), width=0.05) +
  geom_point(size=2.5,fill='white',shape=21) +
  geom_hline(yintercept=0, linetype=2, color='red') +
  theme_bw() + 
  labs(y='Change Pr(Insured) Pre-Post 2014', x='') +
  coord_flip() +
  facet_grid(facet~.,switch = 'y',scales = 'free_y',space='free_y') +
  ggsave(width=6,height=6,file='gallup3.png')

# Objective self interest

form <- have_health_insurance ~ 
  year_after_2014*(strong_rep + weak_rep + weak_dem + strong_dem +
                     health_scale +
                     age + married + college +
                     female + white + latino + 
                     black + asian + income_24_60 + 
                     income_60_120 + income_over_120 + 
                     income_missing)  + as.factor(state)
subdat <- as.data.frame(subset(df, age < 65))
m1 <- glm(form, subdat, family=binomial(link='logit'))
stargazer(m1, omit=c('state'),
          type='html', style='APSR',
          dep.var.labels=c('Have Health Insurance'),
          add.lines = list(c('State FE', 'Y')),
          star.cutoffs = c(0.05, 0.01, 0.001),
          covariate.labels = c('Post-2014','Strong R (ref=Ind)','Weak R','Weak Dem','Strong Dem',
                               'Objective Health Scale',
                               'Age', 'Married', 'College','Female','White','Latino','Black','Asian',
                               'Income $24k-$60k (ref < 24)','Income $60k-$120k','Income Over $120k','Income Missing',
                               'Post-2014 * Strong R (ref=Ind)','Post-2014 * Weak R','Post-2014 * Weak Dem','Post-2014 * Strong Dem',
                               'Post-2014 * Objective Health Scale',
                               'Post-2014 * Age', 'Post-2014 * Married', 'Post-2014 * College','Post-2014 * Female','Post-2014 * White','Post-2014 * Latino','Post-2014 * Black','Post-2014 * Asian',
                               'Post-2014 * Income $24k-$60k (ref < 24)','Post-2014 * Income $60k-$120k','Post-2014 * Income Over $120k','Post-2014 * Income Missing'),
          out='gallupout_robust.html')





# pre <- Model %>%
#   setx(pid5_rep = seq(0,1,length.out=5),
#        year_after_2014=0) %>%
#   sim()
# post <- Model %>%
#   setx(pid5_rep = seq(0,1,length.out=5),
#        year_after_2014=1) %>%
#   sim()
# 
# before <- data.frame(Party=1:5,y=rep(NA, 5), lower=NA, upper=NA, Time='Before Rollout')
# after <- data.frame(Party=1:5,y=rep(NA, 5), lower=NA, upper=NA, Time='After Rollout')
# 
# for(i in 1:5){
#   before[i,2] <- mean(as.data.frame(pre$sim.out$range[[i]]$ev)[,1])
#   before[i,3] <- quantile(as.data.frame(pre$sim.out$range[[i]]$ev)[,1],0.025)
#   before[i,4] <- quantile(as.data.frame(pre$sim.out$range[[i]]$ev)[,1],0.975)
#   after[i,2] <- mean(as.data.frame(post$sim.out$range[[i]]$ev)[,1])
#   after[i,3] <- quantile(as.data.frame(post$sim.out$range[[i]]$ev)[,1],0.025)
#   after[i,4] <- quantile(as.data.frame(post$sim.out$range[[i]]$ev)[,1],0.975)
# }
# g.out.party <- bind_rows(before, after) %>%
#   ggplot(aes(Party, y, color=Time)) +
#   geom_line() +
#   geom_line(aes(Party, y=lower, group=Time), linetype=2) +
#   geom_line(aes(Party, y=upper, group=Time), linetype=2) +
#   theme_bw()+
#   scale_x_continuous(labels=c('Strong D','Weak D', "Ind", "Weak R","Strong R")) +
#   labs(y='E(Insured)') 
# g.out.party
# 
# preH <- Model %>%
#   setx(general_health_poor = seq(0,1,length.out=5),
#        year_after_2014=0) %>%
#   sim()
# postH <- Model %>%
#   setx(general_health_poor = seq(0,1,length.out=5),
#        year_after_2014=1) %>%
#   sim()
# 
# before <- data.frame(PoorHealth=1:5,y=rep(NA, 5), lower=NA, upper=NA, Time='Before Rollout')
# after <- data.frame(PoorHealth=1:5,y=rep(NA, 5), lower=NA, upper=NA, Time='After Rollout')
# 
# for(i in 1:5){
#   before[i,2] <- mean(as.data.frame(preH$sim.out$range[[i]]$ev)[,1])
#   before[i,3] <- quantile(as.data.frame(preH$sim.out$range[[i]]$ev)[,1],0.025)
#   before[i,4] <- quantile(as.data.frame(preH$sim.out$range[[i]]$ev)[,1],0.975)
#   after[i,2] <- mean(as.data.frame(postH$sim.out$range[[i]]$ev)[,1])
#   after[i,3] <- quantile(as.data.frame(postH$sim.out$range[[i]]$ev)[,1],0.025)
#   after[i,4] <- quantile(as.data.frame(postH$sim.out$range[[i]]$ev)[,1],0.975)
# }
# 
# g.out.health <- bind_rows(before, after) %>%
#   ggplot(aes(PoorHealth, y, color=Time)) +
#   geom_line() +
#   geom_line(aes(PoorHealth, y=lower, group=Time), linetype=2) +
#   geom_line(aes(PoorHealth, y=upper, group=Time), linetype=2) +
#   theme_bw()+
#   scale_x_continuous(labels=c('Excellent','Very Good', "Good", "Fair","Poor")) +
#   labs(y='E(Insured)') 
# g.out.health




# missing data ------------------------------------------------------------

df$year_factor <- factor(df$year)
summary(lm(is.na(party) ~ age*year_factor + female*year_factor + college*year_factor + white*year_factor + married*year_factor + income_under_24*year_factor, df ))
summary(lm(is.na(have_health_insurance) ~ age*year_factor + female*year_factor + college*year_factor + white*year_factor + married*year_factor + income_under_24*year_factor, df ))
summary(lm(is.na(general_health_poor) ~ age*year_factor + female*year_factor + college*year_factor + white*year_factor + married*year_factor + income_under_24*year_factor, df ))
ß
df %>%
  group_by(year, ismissing = is.na(party)) %>%
  summarise(age = mean(age, na.rm=T),
            female=mean(female,na.rm=T),
            educ = mean(college,na.rm=T),
            white= mean(white,na.rm=T),
            married = mean(married, na.rm=T),
            income_under_24 = mean(income_under_24, na.rm=T)) %>%
  filter(ismissing==T) %>%
  gather(variable, value, 3:8) %>%
  ggplot(aes(year,value)) +
  geom_bar(stat='identity') + 
  theme_bw() + 
  facet_wrap(~variable, scales='free_y')


df %>%
  group_by(year, ismissing = is.na(general_health_poor)) %>%
  summarise(age = mean(age, na.rm=T),
            female=mean(female,na.rm=T),
            educ = mean(college,na.rm=T),
            white= mean(white,na.rm=T),
            married = mean(married, na.rm=T),
            income_under_24 = mean(income_under_24, na.rm=T)) %>%
  filter(ismissing==T) %>%
  gather(variable, value, 3:8) %>%
  ggplot(aes(year,value)) +
  geom_bar(stat='identity') + 
  theme_bw() + 
  facet_wrap(~variable, scales='free_y')


df %>%
  group_by(year, ismissing = is.na(have_health_insurance)) %>%
  summarise(age = mean(age, na.rm=T),
            female=mean(female,na.rm=T),
            educ = mean(college,na.rm=T),
            white= mean(white,na.rm=T),
            married = mean(married, na.rm=T),
            income_under_24 = mean(income_under_24, na.rm=T)) %>%
  filter(ismissing==T) %>%
  gather(variable, value, 3:8) %>%
  ggplot(aes(year,value)) +
  geom_bar(stat='identity') + 
  theme_bw() + 
  facet_wrap(~variable, scales='free_y')



